***************************************************************************

* Load RVU data

	use  "${intermediate_data}/RVU/RVUvalues_appended.dta", clear

	* keep only professional component (mod-26)

	gen mod26 = (mod == "26")
	bysort hcpcs year: gegen has_mod_26 = max(mod26)
	drop if has_mod_26 & !mod26
	drop if mod == "53"

	* save RVU data in wide and long formats

	keep hcpcs year facilitytotal nonfactotal
	rename (facilitytotal nonfactotal) (rvu_value_hcpcs_posF rvu_value_hcpcs_posO)
	greshape long rvu_value_hcpcs_pos, i(hcpcs year) j(place_of_service) string

	save "${temp}/rvu_values_hcpcs_pos_long.dta", replace

	keep hcpcs place_of_service year rvu_value_hcpcs_pos
	greshape wide rvu_value_hcpcs_pos, i(hcpcs place_of_service) j(year)

	save "${temp}/rvu_values_hcpcs_pos_wide.dta", replace

***************************************************************************

* Save lists of physicians in our primary analytic sample in the relevant age ranges

	use npi year age cms_spec_code if inrange(age,40,55) using "${clean_data}/panel_physicians_clean.dta", clear
	encode cms_spec_code, gen(cms_spec_code_num)
	drop cms_spec_code
	save "${temp}/npi_list_age40to55.dta", replace

	use npi year age cms_spec_code if inrange(age,56,70) using "${clean_data}/panel_physicians_clean.dta", clear
	encode cms_spec_code, gen(cms_spec_code_num)
	drop cms_spec_code
	save "${temp}/npi_list_age56to70.dta", replace

***************************************************************************

* Process MPUPD data; for each age range, create three datasets: 

     * 1) NPI-year-procedure-POS level data
	* 2) NPI-year level data
	* 3) NPI-year level simulated instrument that captures regulatory changes in prices
	
	use "${intermediate_data}/MPUPD-PhysOtherSupplier/2012/Medicare_Provider_Util_Payment_PUF_CY2012.dta", clear
	
	forvalues year=2013/2017 {
		
		append using "${intermediate_data}/MPUPD-PhysOtherSupplier/`year'/Medicare_Provider_Util_Payment_PUF_CY`year'.dta"
	
	}
	
	drop provider_type medicare_participation_indicator bene_day_srvc_cnt average_submitted_chrg_amt average_medicare_standard_amt
	
	rename hcpcs_code hcpcs
	merge m:1 hcpcs place_of_service year using "${temp}/rvu_values_hcpcs_pos_long.dta", keep(match) nogen

	gen revenue_hcpcs_pos=average_medicare_payment_amt*line_srvc_cnt
	gen total_rvu_hcpcs_pos=rvu_value_hcpcs_pos*line_srvc_cnt
		
	gisid npi year hcpcs place_of_service
	order npi year hcpcs place_of_service	
	
	* Dataset 1) NPI-year-procedure-POS level data
	save "${intermediate_data}/RVU/npi_hcpcs_pos_panel.dta", replace
	
	* Aggregate data to NPI-year level	
	
		* Revenue
	gegen revenue_doc_yr=total(revenue_hcpcs_pos), by(npi year) 
	
		* Number of RVUs
	gegen total_rvu_doc_yr=total(total_rvu_hcpcs_pos), by(npi year) 

		* Lines billed 
	gegen lines_doc_yr=total(line_srvc_cnt), by(npi year)
	
		* Max unique patients seen
	gegen maxpatient_doc_yr=max(bene_unique_cnt), by(npi year)

		* Number of unique procedures-POS 
	gegen unique_hcpcs_pos_doc_yr=count(_n), by(npi year)
	
		* Payment amount for the most expensive procedure 
	gegen maxmcrpay_doc_yr=max(average_medicare_payment_amt), by(npi year)
	
		* Payment amount for the least expensive procedure  
	gegen minmcrpay_doc_yr=min(average_medicare_payment_amt), by(npi year)		
	
	* Dataset 2) NPI-year level data
		
	keep npi year *_doc_yr
	gduplicates drop 
	gisid npi year
	save "${intermediate_data}/RVU/npi_level_mpupd_panel.dta", replace
	
	* Simulated instrument
	
	use "${intermediate_data}/RVU/npi_hcpcs_pos_panel.dta", clear	
	merge m:1 hcpcs place_of_service using "${temp}/rvu_values_hcpcs_pos_wide.dta", keep(match) nogen
	* Compute average number of line items per procedure-POS over time 
	* This will be the reference quantity that is time-invariant 
	gegen line_srvc_cnt_mean=mean(line_srvc_cnt), by(npi hcpcs place_of_service)
	keep npi hcpcs place_of_service rvu_value_hcpcs_pos2* line_srvc_cnt_mean
	gduplicates drop 
	gisid npi hcpcs place_of_service
	
	* Quantity-invariant RVUs that only vary due to policy changes 
	
	forvalues year=2010/2019 {
		gen rvu`year'_atfixq_hcpcs_pos=rvu_value_hcpcs_pos`year'*line_srvc_cnt_mean
	}
	
	* Add-up RVUs at fixed quantities to the physician-year level

	forvalues year=2010/2019 {
		gegen rvu`year'_atfixq_doc_yr=total(rvu`year'_atfixq_hcpcs_pos), by(npi)
	}
		
	keep npi *_doc_yr
	gduplicates drop
	reshape long rvu@_atfixq_doc_yr, i(npi) j(year)
	save "${intermediate_data}/RVU/rvus_instrument.dta", replace
